home *** CD-ROM | disk | FTP | other *** search
- /*************************************************************************************
- #
- # Encoder.c
- #
- # The guts of the text encoder sample. This reads a text file, attempts to determine
- # the encoding, and then converts it to a different encoding.
- #
- # Author: Timothy Carroll
- # Apple Developer Technical Support
- # timc@apple.com
- #
- # Revision: Jason Yeo
- #
- # Modification History:
- #
- # 2/9/97 TMC Initial Release
- #
- # 9/12/97 JY Updated for:
- # TEC 1.2.1
- # Universal Interfaces 3.0
- # CodeWarrior 11 projects
- #
- # Copyright © 1997 Apple Computer, Inc., All Rights Reserved
- #
- #
- # You may incorporate this sample code into your applications without
- # restriction, though the sample code has been provided "AS IS" and the
- # responsibility for its operation is 100% yours. However, what you are
- # not permitted to do is to redistribute the source as "DSC Sample Code"
- # after having made changes. If you're going to re-distribute the source,
- # we require that you make it clear in the source that the code was
- # descended from Apple Sample Code, but that you've made changes.
- #
- *************************************************************************************/
-
- #include "Encoder.h"
- #include "TextCommon.h"
- #include "TextEncodingConverter.h"
- #include "Errors.h"
- #include <Files.h>
-
- #define kOutputBufferSize 1024*32
-
- OSStatus ReadFile (FSSpec *file, Ptr *bufferPtr, SInt32 *length);
- OSStatus FindBestEncoding (Ptr buffer, SInt32 length, TextEncoding *bestEncoding);
-
- OSStatus EncodeFile (FSSpec *inputFile)
- {
- OSStatus theErr = noErr;
- OSStatus convertStatus = noErr;
-
- short outputRef = 0;
- Boolean outputFileOpened = false;
- FSSpec outputFile;
- short volume;
-
- Ptr inputBuffer = NULL, outputBuffer = NULL;
- long length;
- TextEncoding inputEncoding;
- TECObjectRef converter = NULL;
- char nameIndex;
-
- ByteCount inputLength;
- ByteCount actualInputLength, actualOutputLength;
- TextPtr inputPtr;
-
-
- // STEP 1 READ THE FILE
- theErr = ReadFile (inputFile, &inputBuffer, &length);
- FAIL_OSERR (theErr, "\pError: Failed to read the file");
-
- // STEP 2 SNIFF FOR ENCODING
- theErr = FindBestEncoding (inputBuffer, length, &inputEncoding);
- FAIL_OSERR (theErr, "\pError: Failed to find a good encoding")
-
- // STEP 3 CREATE A CONVERTER
- theErr = TECCreateConverter (&converter, inputEncoding, gPreferences.outputEncoding);
- FAIL_OSERR (theErr, "\pError: Unable to create a converter between the two encodings")
-
- // STEP 4 CREATE AN OUTPUT FILE
- // For now, we delete any existing file with the name. Our name is the old name, with
- // .OUT appended to the end. We cut short the name if it would be greater than 31 chars.
-
- outputFile = *inputFile;
- nameIndex = outputFile.name[0];
-
- nameIndex += 4;
- if (nameIndex > 31)
- nameIndex = 31;
-
- outputFile.name[0] = nameIndex;
-
- nameIndex -= 3;
- outputFile.name[nameIndex] = '.';
- outputFile.name[nameIndex+1] = 'O';
- outputFile.name[nameIndex+2] = 'U';
- outputFile.name[nameIndex+3] = 'T';
-
- (void) FSpDelete (&outputFile);
-
- theErr = FSpCreate (&outputFile, 'DENC', 'TEXT', smSystemScript);
- FAIL_OSERR (theErr, "\pError: Failed to create the output file");
-
- theErr = FSpOpenDF (&outputFile, fsRdWrPerm, &outputRef);
- FAIL_OSERR (theErr, "\pError: Unable to open the output file")
- outputFileOpened = true;
-
- theErr = SetEOF (outputRef, 0);
- FAIL_OSERR (theErr, "\pError: Failed to reset the output file's length")
-
- theErr = SetFPos (outputRef, fsFromStart, 0);
- FAIL_OSERR (theErr, "\pError: Unable to move to start of file")
-
- // First we need an output buffer
-
- outputBuffer = NewPtrClear (kOutputBufferSize);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to create the output buffer")
- FAIL_NIL (outputBuffer, "\pError: Failed to create the output buffer")
-
-
- // STEP 5 CONVERT THE FILE
- // We iterate over the input buffer, converting blocks of text into the
- // output buffer and copying those files out to the output file.
-
- inputLength = length;
- inputPtr = (unsigned char *) inputBuffer;
-
- do
- {
- convertStatus = TECConvertText (converter, inputPtr, inputLength, &actualInputLength,
- (unsigned char *) outputBuffer, kOutputBufferSize, &actualOutputLength);
-
- // Advance the counters
- inputPtr += actualInputLength;
- inputLength -= actualInputLength;
-
- theErr = FSWrite (outputRef, (long *) &actualOutputLength, outputBuffer);
- FAIL_OSERR( theErr, "\pError: Failed to write output data")
-
- } while (convertStatus == kTECUsedFallbacksStatus);
-
- do
- {
- convertStatus = TECFlushText (converter, (unsigned char *) outputBuffer,
- kOutputBufferSize, &actualOutputLength);
-
- theErr = FSWrite (outputRef, (long *) &actualOutputLength, outputBuffer);
- FAIL_OSERR( theErr, "\pError: Failed to write output data")
- }
- while (convertStatus == kTECNeedFlushStatus);
-
- theErr = GetVRefNum (outputRef, &volume);
- FAIL_OSERR (theErr, "\pError: Failed to retrieve the volume ref num")
-
- theErr = FlushVol (NULL, volume);
- FAIL_OSERR (theErr, "\pError: Failed to flush the volume of the output file")
-
- // we're done!!
- // Close everything and exit.
-
- goto cleanup;
-
- error:
- if (theErr == noErr)
- theErr = paramErr;
-
- cleanup:
- if (outputFileOpened)
- (void) FSClose (outputRef);
- if (inputBuffer)
- DisposePtr (inputBuffer);
- if (outputBuffer)
- DisposePtr (outputBuffer);
- if (converter)
- (void) TECDisposeConverter (converter);
-
- return theErr;
- }
-
-
- // This routine simple opens up the file and reads the entire contents into a data buffer.
- OSStatus ReadFile (FSSpec *file, Ptr *bufferPtr, SInt32 *length)
- {
- OSStatus theErr = noErr;
- short fileRef;
- Boolean fileOpened = false;
- Ptr fileBuffer = NULL;
- long actualLength;
-
- theErr = FSpOpenDF (file, fsRdPerm, &fileRef);
- FAIL_OSERR (theErr, "\pError: Unable to open the file")
- fileOpened = true;
-
- theErr = GetEOF (fileRef, length);
- FAIL_OSERR (theErr, "\pError: Unable to get the file's length")
-
- theErr = SetFPos (fileRef, fsFromStart, 0);
- FAIL_OSERR (theErr, "\pError: Unable to move to start of file")
-
- fileBuffer = NewPtrClear (*length);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to create a buffer to hold the file data")
- FAIL_NIL (fileBuffer, "\pError: Failed to create a buffer to hold the file data")
-
- actualLength = *length;
- theErr = FSRead(fileRef, &actualLength, fileBuffer);
- FAIL_OSERR (theErr, "\pError: Failed to read the file")
- if (actualLength != *length) SIGNAL_ERROR ("\pError: Incorrect length of info read")
-
- *bufferPtr = fileBuffer;
-
- goto cleanup; // we're done, close all the files and exit.
-
- error:
- if (theErr == noErr)
- theErr = paramErr;
-
- if (fileBuffer)
- DisposePtr (fileBuffer);
-
- // We returned an error, so we should not return any valid buffer info.
- *bufferPtr = NULL;
- *length = 0;
-
- cleanup:
-
- if (fileOpened)
- (void) FSClose (fileRef);
-
- return theErr;
- }
-
-
- OSStatus FindBestEncoding (Ptr buffer, SInt32 length, TextEncoding *bestEncoding)
- {
- OSStatus theErr = noErr;
- ItemCount maxRegionEncodings, actualRegionEncodings;
- ItemCount maxSnifferEncodings, actualSnifferEncodings;
- TextEncoding *regionEncodings = NULL, *snifferEncodings = NULL;
-
- TECSnifferObjectRef sniffer = NULL;
- ItemCount *errors = NULL, *features = NULL;
-
- int loop, loop2;
- Boolean found;
-
- // Create a buffer that holds all the encodings for the region
-
- theErr = TECCountWebTextEncodings (gPreferences.locale, &maxRegionEncodings);
- FAIL_OSERR (theErr, "\pError: Failed to retrieve count of encodings")
-
- regionEncodings = (TextEncoding *) NewPtrClear (sizeof (TextEncoding) * maxRegionEncodings);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to allocate a buffer to hold encodings")
- FAIL_NIL (regionEncodings, "\pError: Failed to allocate a buffer to hold encodings")
-
- theErr = TECGetWebTextEncodings (gPreferences.locale, regionEncodings,
- maxRegionEncodings, &actualRegionEncodings);
- FAIL_OSERR (theErr, "\pError: Failed to retrieve list of encodings")
-
-
- // Create a buffer that holds all of the encodings we can sniff
-
- theErr = TECCountAvailableSniffers (&maxSnifferEncodings);
- FAIL_OSERR (theErr, "\pError: Failed to retrieve count of encodings")
-
- snifferEncodings = (TextEncoding *) NewPtrClear (sizeof (TextEncoding) * maxSnifferEncodings);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to allocate a buffer to hold encodings")
- FAIL_NIL (snifferEncodings, "\pError: Failed to allocate a buffer to hold encodings")
-
- theErr = TECGetAvailableSniffers (snifferEncodings, maxSnifferEncodings, &actualSnifferEncodings);
- FAIL_OSERR (theErr, "\pError: Failed to retrieve list of encodings")
-
-
- // Remove all encodings from the region list that don't exist in the sniffer list.
- // After each iteration of the loop, we either advance the loop counter or shrink
- // the list by one.
-
- loop = 0;
- while (loop < actualRegionEncodings)
- {
- // Scan the list to see if we've found a match
- found = false;
- for (loop2 = 0; loop2 < actualSnifferEncodings; loop2++)
- {
- if (regionEncodings[loop] == snifferEncodings[loop2])
- {
- found = true;
- loop2 = actualSnifferEncodings; // break out of loop
- }
- }
-
- // Did we find a match? If so, the region code lives
- if (found)
- loop++;
- else
- // If not, the region code dies, and we shrink the list. We decrement
- // the list by one, and copy the last item on the list to the current
- // loop location. We do this even if we're the last item on the list...
- // it won't be used later, and no reason to do a compare.
- {
- actualRegionEncodings -=1;
- regionEncodings[loop] = regionEncodings[actualRegionEncodings];
- }
- }
-
- if (actualRegionEncodings == 0)
- SIGNAL_ERROR ("\pError: Unable to find a sniffer that works for this region")
-
- // Create a sniffer object for our list of encodings.
- theErr = TECCreateSniffer (&sniffer, regionEncodings, actualRegionEncodings);
- FAIL_OSERR (theErr, "\pError: Unable to create a sniffer object.")
-
- // Create buffers to hold the return results of the sniffer
-
- errors = (ItemCount *) NewPtrClear (sizeof (ItemCount) * actualRegionEncodings);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to allocate a buffer to hold sniffer errors")
- FAIL_NIL (errors, "\pError: Failed to allocate a buffer to hold sniffer errors")
-
- features = (ItemCount *) NewPtrClear (sizeof (ItemCount) * actualRegionEncodings);
- theErr = MemError();
- FAIL_OSERR (theErr, "\pError: Failed to allocate a buffer to hold sniffer features")
- FAIL_NIL (features, "\pError: Failed to allocate a buffer to hold sniffer features")
-
- // Finally, we try to find out the actual encoding!!!
-
- theErr = TECSniffTextEncoding (sniffer, (unsigned char *)buffer, length,
- regionEncodings, actualRegionEncodings,
- errors, kMaxErrors,
- features, kMaxFeatures);
- FAIL_OSERR (theErr, "\pError: Failed to examine the encoding")
-
- // We made it this far. The first encoding is our best guess, so return it and cleanup;
- *bestEncoding = *regionEncodings;
-
- goto cleanup;
-
- error:
- if (theErr == noErr)
- theErr = paramErr;
-
- cleanup:
- if (sniffer)
- TECDisposeSniffer(sniffer);
- if (regionEncodings)
- DisposePtr ((Ptr) regionEncodings);
- if (snifferEncodings)
- DisposePtr ((Ptr) snifferEncodings);
- if (errors)
- DisposePtr ((Ptr) errors);
- if (features)
- DisposePtr ((Ptr) features);
-
- return theErr;
- }
-